# python前置知识讲解
# 导入requests库
import requests
import json
from lxml import etree
import openpyxl

# 通过get函数获取数据
# https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner
# response = requests.get(url='https://www.baidu.com/')
# print(response)  # <Response [200]> 200 表示的是网络连接通常  404 找不到网址  500 服务器代码出错
# print(response.text)
# print(response.encoding)
# print(response.apparent_encoding)
# print('*' * 50)
# # 将获取时候的编码传给页面本身的编码变量
# response.encoding = response.apparent_encoding
# print(response.text)

# kv = {'cn': '导演', 'pcn': '娱乐明星', 'ie': 'utf-8'}
# r = requests.request('GET', 'http://tieba.baidu.com/f/index/forumpark', params=kv)
# r.encoding = r.apparent_encoding
# print(r.text)
#
# dic = {'name': 'xiaohu'}
# json1 = json.dumps(dic)
# print(json1)
#
# print('*' * 100)
# html1 = """
# <!DOCTYPE html>
# <html>
#  <head lang='en'>
#     <meta charest='utf-8'>
#     <title></title>
#  </head>
#  <body>
#     <div id="test-1">小虎</div>
#     <div id="test-2">需要的内容2</div>
#     <div id="testfault">需要的内容3</div>
#  </body>
# </html>
# """
# print(html1, type(html1))
# # 第一步，将爬取下来的页面字符串转换成可以被xpath所分析的html界面
# selector = etree.HTML(html1)
# text1 = selector.xpath('//div[@id="test-2"]/text()')
# print(text1)

response = requests.get('https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner')
response.encoding = response.apparent_encoding
# print(response.text)
text = etree.HTML(response.text)
# print(text)
print(text.xpath('//script[@type="application/json"]/text()'))

# 创建一个工作簿
wb = openpyxl.Workbook()
# 获取默认的一个工作表
ws = wb.active
ws.title = "数加科技马鞍山独立营2期学生表"
ws.append(['姓名', '年龄', '性别'])
ws.append(['李凯迪', '18岁', '男'])

# 在工作簿中创建一个新的工作表
ws2 = wb.create_sheet("学生成绩表")
ws2.append(['姓名', '总分'])
ws2.append(['李凯迪', '700'])

ws3 = wb.create_sheet("更新时间表")
ws3.column_dimensions['A'].width = 23
ws3.append(['更新时间'])
ws3.append(['2022-10-23 10:40:00'])

wb.save("马鞍山独立营信息.xlsx")



